##Replication code for: 
##"Combining Patronage and Merit in Public Sector Recruitment" (JOP)
##Sarah Brierley

####-----
## Packages
####-----
library(stargazer)
library(dplyr)
library(xtable)
library(ggplot2)
library(ggrepel)
library(cowplot) # for side by side plots in ggplot 
library(lubridate)
library(arm)

####-----
## Bring in the data
####-----
rm(list=ls())

## DOWNLOAD AND SAVE DATA -- main_rep_data.csv
## SET Working directory 
## BRING IN THE CLEAN DATA 
data<-read.csv("main_rep_data.csv",header=T)

####-----
##Table 1: Difference-in-means test (ethnic group coding)
####-----

## Group the data by hiring period, and menial vs. professional
group_hiring <- dplyr::group_by(data, period2, menial)

group_hiring_sum <- summarise(group_hiring,
                              ndc_pct1=round(mean(ethnNDC*100), 2), 
                              ndc_tot1= sum(ethnNDC),
                              npp_pct1=round(mean(ethnNPP*100), 2), 
                              npp_tot1= sum(ethnNPP))

table <- t(group_hiring_sum)

ndc_menial_share1 <- table[3,2]
ndc_menial_share2 <- table[3,4]
ndc_prof_share1 <- table[3,1]
ndc_prof_share2 <- table[3,3]

npp_menial_share1 <- table[5,2]
npp_menial_share2 <- table[5,4]
npp_prof_share1 <- table[5,1]
npp_prof_share2 <- table[5,3]

ndc_men_diff <- ndc_menial_share2 - ndc_menial_share1 
ndc_prof_diff <- ndc_prof_share2 - ndc_prof_share1
npp_men_diff <- npp_menial_share2 - npp_menial_share1 
npp_prof_diff <- npp_prof_share2 - npp_prof_share1

## Coded by Ethnicity 
t1 <- round(t.test(data$ethnNDC[data$period2==0 & data$menial==1], data$ethnNDC[data$period2==1 & data$menial==1])$p.value, 3)
t2 <- round(t.test(data$ethnNDC[data$period2==0 & data$menial==0], data$ethnNDC[data$period2==1 & data$menial==0])$p.value, 3)

t3 <- round(t.test(data$ethnNPP[data$period2==0 & data$menial==1], data$ethnNPP[data$period2==1 & data$menial==1])$p.value, 3)
t4 <- round(t.test(data$ethnNPP[data$period2==0 & data$menial==0], data$ethnNPP[data$period2==1 & data$menial==0])$p.value, 3)

row_names <- cbind("Period 1", "Period 2", "Difference", "P-value")
row1 <- cbind(ndc_menial_share1, ndc_menial_share2, ndc_men_diff, t1)
row2 <- cbind(ndc_prof_share1, ndc_prof_share2, ndc_prof_diff, t2)
row3 <- cbind(npp_menial_share1, npp_menial_share2, npp_men_diff, t3)
row4 <- cbind(npp_prof_share1, npp_prof_share2, npp_prof_diff, t4)

table <- rbind(row_names, row1, row2, row3, row4)

##----
## Print out Table 1
##----
table

####-----
#Figure 1: Share of bureaucrat types across two electoral periods (2005–2008, 2009–2012)
####-----

tab <- table(data$yr[data$menial==0 & data$yr!=2013], data$b_type.2[data$menial==0 & data$yr!=2013])
mytab <- round(prop.table(tab, 1)*100,1) # column percentages
df1 <- as.data.frame(mytab)
names(df1) <- c("year", "btype", "share")

plot1 <- ggplot(data=df1, aes(x=year, y=share, group=btype, colour=btype)) +
  geom_line(size=1.5, aes(linetype=btype)) + 
  geom_point(size=2, fill="white") +
  xlab("Year") + ylab("Share of hires") +ggtitle("Professional") +
  scale_colour_hue(name="Employee Type")  +
  scale_linetype_manual(values=c("solid", "twodash", "dotted"))+
  scale_color_manual(values = c("black", "black", "black")) + 
  theme_bw() +
  theme(axis.text.y = element_text(size=15),
        axis.text.x = element_text(size=15),
        axis.title.x=element_text(size=15), 
        axis.title.y=element_text(size=18), 
        legend.text=element_text(size=15),
        legend.position="bottom", 
        legend.title=element_blank(), 
        legend.key.size = unit(4,"line"), 
        plot.title = element_text(hjust = 0.5)) +
  
  #legend.text=element_text(size=15), 
  ylim(0, 60) +
  geom_vline(xintercept=which(df1$year == "2009"), linetype="dotted", size=1)

plot1
## Plot of hiring patterns (Menial)

tab1 <- table(data$yr[data$menial==1& data$yr!=2013], data$b_type.2[data$menial==1& data$yr!=2013])
mytab1<- round(prop.table(tab1, 1)*100,1) # column percentages
df2 <- as.data.frame(mytab1)
names(df2) <- c("year", "btype", "share")
#df2 <- filter(df1, year!="2015" & year!="2001" &  year!="2002"  & year!="2003" & year!="2004" & year!="2013" & year!="2014" )

plot2 <- ggplot(data=df2, aes(x=year, y=share, group=btype, colour=btype)) +
  geom_line(size=1.5, aes(linetype=btype)) + 
  geom_point(size=2, fill="white") +
  xlab("Year") + ylab("Share of hires") + ggtitle("Menial") +
  scale_colour_hue(name="Employee Type")  +
  scale_linetype_manual(values=c("solid", "twodash", "dotted"))+
  scale_color_manual(values = c("black", "black", "black")) + 
  theme_bw() +
  theme(axis.text.y = element_text(size=15),
        axis.text.x = element_text(size=15),
        axis.title.x=element_text(size=15), 
        axis.title.y=element_text(size=18), 
        legend.text=element_text(size=15),
        legend.position="bottom", 
        legend.title=element_blank(),
        legend.key.size = unit(4,"line"), 
        plot.title = element_text(hjust = 0.5)) +
  ylim(0, 60) +
  geom_vline(xintercept=which(df2$year == "2009"), linetype="dotted", size=1.0)

plot2

plot_grid(plot1, plot2, align='h', rel_widths = c(1,1))



####-----
##Table 2: Logistic regression predicting hiring of partisan bureaucrats across each time period
####-----


reg1<- glm(ethnNDC ~ period2 + menial + men_per2 + male + age_hires_years + edu2 + edu3 + edu4, family=binomial(link="logit"), data=data)

reg2 <- glm(swing ~ period2 + menial + men_per2 + male + age_hires_years + edu2 + edu3 + edu4, family=binomial(link="logit"), data=data)
summary(reg2)

reg3<- glm(ethnNPP ~ period2 + menial + men_per2 + male + age_hires_years + edu2 + edu3 + edu4, family=binomial(link="logit"), data=data)
summary(reg3)

stargazer(reg1, reg2, reg3)


####-----
##Figure 2: Difference in the predicted probability of a pro-NDC and pro-NPP bureaucrat being hired in each term, disaggregated by job type.
####-----


##------------------------
## Predicted probabilities
##------------------------

## -- REG 1 -- ## 

hir<-c(1,0,1,0)
dis<-c(1,1,0,0)
hir_des<-c(1,0,0,0)
pred.prob <- c() 
sd.prob <- c()

for(i in 1:4){
  temp<-data%>%
    dplyr::mutate(int=1, period2=hir[i], menial=dis[i], men_per2=hir_des[i])%>%
    dplyr::select(int, period2, menial, men_per2, male, age_hires_years, edu2, edu3, edu4)
  predict <- predict(reg1, temp, type="response", se.fit = T)
  pred.prob[i]= mean(predict[[1]], na.rm=T)  
  # Second list contains the standard errors
  sd.prob[i] = mean(predict[[2]], na.rm=T)
  print(pred.prob)
  print(sd.prob)
}

diff <- c((pred.prob[1] - pred.prob[2]), (pred.prob[3] - pred.prob[4]))
se_diff <- c(sqrt(sd.prob[1]^2 + sd.prob[2]^2), sqrt(sd.prob[3]^2 + sd.prob[4]^2))

lower <- diff[1] - 1.96* se_diff[1]
upper <- diff[1] + 1.96* se_diff[1]

lower2 <- diff[2] - 1.96* se_diff[2]
upper2 <- diff[2] + 1.96* se_diff[2]

points1 <- diff

lowers1 <- c(lower,  lower2)
uppers1 <- c(upper, upper2)

## Plot 

## FIGURE -- INCUMBENT SUPPORTERS

par(mgp=c(5,1,0))
par(mar=c(8,8, 4, 2) + 0.1)
par(mfrow=c(2,1))

#par(mfrow=c(1,1))

plot(points1, c(0.2,0.3), axes=F, pch=19, xlim=c(-.15, .15), ylab="", cex=1.5, xlab="Difference in predicted probabilities", cex.axis=1.4, cex.lab=1.4, sub="(Pro-NDC)", cex.sub=1.4)
segments(lowers1, c(0.2,0.3), uppers1, c(0.2, 0.3), lwd=2)
abline(v=0, lty=2, lwd=2)
#abline(h=0.5, lwd=2)
axis(side = 2, at = c(0.2, 0.3), labels=c("Menial", "Professional"), las=1, cex.axis=1.5)
axis(side = 1, pos = 0.19)


##-------
##  Same thing For REG 3
##------

hir<-c(1,0,1,0)
dis<-c(1,1,0,0)
hir_des<-c(1,0,0,0)
pred.prob <- c() 
sd.prob <- c()

for(i in 1:4){
  temp<-data%>%
    dplyr::mutate(int=1, period2=hir[i], menial=dis[i], men_per2=hir_des[i])%>%
    dplyr::select(int, period2, menial, men_per2, male, age_hires_years, edu2, edu3, edu4)
  predict <- predict(reg3, temp, type="response", se.fit = T)
  pred.prob[i]= mean(predict[[1]], na.rm=T)  
  # Second list contains the standard errors
  sd.prob[i] = mean(predict[[2]], na.rm=T)
  print(pred.prob)
  print(sd.prob)
}

diff <- c((pred.prob[1] - pred.prob[2]), (pred.prob[3] - pred.prob[4]))
se_diff <- c(sqrt(sd.prob[1]^2 + sd.prob[2]^2), sqrt(sd.prob[3]^2 + sd.prob[4]^2))

lower <- diff[1] - 1.96* se_diff[1]
upper <- diff[1] + 1.96* se_diff[1]

lower2 <- diff[2] - 1.96* se_diff[2]
upper2 <- diff[2] + 1.96* se_diff[2]

points1 <- diff

lowers1 <- c(lower,  lower2)
uppers1 <- c(upper, upper2)

## Plot 

## FIGURE -- INCUMBENT SUPPORTERS

par(mgp=c(5,1,0))
par(mar=c(8,8, 4, 2) + 0.1)
#par(mfrow=c(1,1))

plot(points1, c(0.2,0.3), axes=F, pch=19, xlim=c(-.15, .15), ylab="", cex=1.5, xlab="Difference in predicted probabilities", cex.axis=1.4, cex.lab=1.4, sub="(Pro-NPP)", cex.sub=1.4)
segments(lowers1, c(0.2,0.3), uppers1, c(0.2, 0.3), lwd=2)
abline(v=0, lty=2, lwd=2)
#abline(h=0.5, lwd=2)
axis(side = 2, at = c(0.2, 0.3), labels=c("Menial", "Professional"), las=1, cex.axis=1.5)
axis(side = 1, pos = 0.19)

###_-----
## FOR REPLICATION OF TABLE 3: SEE PAPER_REPLICATE_MENIAL.r
###------